5. Spectral Analysis


Table of Contents

[1]:
# Import necessary modules
from Xpectra.SpecFitAnalyzer import SpecFitAnalyzer
from Xpectra.LineAssigner import *
from Xpectra.SpecStatVisualizer import plot_fitted_als_bokeh, plot_spectra_errorbar_bokeh

5.1 - Load the processed spectra and the line centers

\(\rightarrow\) In the previous step, we located the peaks in the methane laboratory spectrum. Let’s load them, as well as the spectrum itself:

[2]:
# Call environment variable and assign path to data
__reference_data_path__ = os.getenv("Xpectra_reference_data")

# Import baseline corrected spectrum
corrected_spectrum = pd.read_csv(os.path.join(__reference_data_path__,'processed_data','arpls_baseline_corrected_methane_spectrum.csv'))

# Assign wavenumber (x) and signal (y) arrays
x = corrected_spectrum['original_x'].dropna().to_numpy()
y = corrected_spectrum['original_y'].dropna().to_numpy()

x_baseline_corr = corrected_spectrum['baseline_corrected_x'].dropna().to_numpy()
y_baseline_corr = corrected_spectrum['baseline_corrected_y'].dropna().to_numpy()
[3]:
corrected_spectrum.head()
[3]:
original_x original_y cleaned_x cleaned_y baseline_corrected_x baseline_corrected_y
0 2898.543060 0.643845 2898.543060 0.440297 2898.543060 0.014491
1 2898.543908 0.646336 2898.543908 0.436436 2898.543908 0.017204
2 2898.544766 0.645778 2898.544766 0.437300 2898.544766 0.016869
3 2898.545133 0.639101 2898.545133 0.447693 2898.545133 0.010415
4 2898.545638 0.630384 2898.545638 0.461426 2898.545638 0.001921
[4]:
# Import spectral lines
spectral_lines = pd.read_csv(os.path.join(__reference_data_path__,'processed_data',"closest_hitran_lines_auto.csv"))

# Define arrays
peak_locations = spectral_lines["peak_center"].to_numpy()
peak_heights = spectral_lines["peak_heights"].to_numpy()

\(\rightarrow\) Visualize the imported spectra:

[5]:
# Obtain previously fitted baseline by reverse correcting the spectrum
spectral_baseline = y - y_baseline_corr

plot_fitted_als_bokeh(wavenumber_values = x,
                      signal_values = y,
                      fitted_baseline = spectral_baseline,
                      baseline_type = 'arpls'
                     )
Loading BokehJS ...

\(\rightarrow\) Create initial guesses array of Gaussian peak parameters

[6]:
# Initial guesses need {center, height, width}
num_peaks = len(peak_locations)

# Obtain arrays shape (1, num_peaks)
width_guesses = np.array([np.ones(num_peaks)]).T * 0.001
peak_guesses = np.array([peak_locations]).T
height_guesses = np.array([peak_heights]).T

# Initial guesses: shape (3, num_peaks)
initial_guesses = np.hstack((peak_guesses, height_guesses, width_guesses))

5.2 - Fit the spectral peaks

Xpectra.SpecFitAnalyzer module has 3 essential purposes: process spectral data (completed), fit and correct spectral baseline (completed), and identify and fit spectral peaks (current).

At this step, we use line profile functions to model the shape of the spectral peaks and extract spectroscopic parameters.

5.2.1 - Select wavelength range for analysis

[7]:
wavenumber_range = (2911.15, 2911.9) # cm^-1
[8]:
plot_spectra_errorbar_bokeh(wavenumber_values = x_baseline_corr,
                            signal_values = y_baseline_corr,
                            wavenumber_range = wavenumber_range,
                            absorber_name = 'CH4',
                            plot_type = 'line')
Loading BokehJS ...

5.2.2 - Initialize class

\(\rightarrow\) Using SpecFitAnalyzer, input baseline-corrected spectrum, absorber name, and environment variable

[9]:
# Initialize SpecFitAnalyzer
specfit = SpecFitAnalyzer(wavenumber_values = x_baseline_corr,
                          y_baseline_corrected = y_baseline_corr,
                          absorber_name = 'CH4',
                          __reference_data__ = __reference_data_path__)

5.2.3 - Fit Spectrum and Visualize

\(\rightarrow\) Here are the line profile options:

  • Gaussian(x) = amplitude * np.exp(-(x - center) ** 2 / (2 * width ** 2))

  • Lorentzian(x) = amplitude / (1 + ((x - center) / width) ** 2)

  • Voigt(x) = amplitude * np.real(wofz(z)).astype(float) / (sigma * np.sqrt(2 * np.pi))

    • sigma = wid_g / np.sqrt(2 * np.log(2))

    • gamma = wid_l / 2

    • z = ((x - center) + 1j * gamma) / (sigma * np.sqrt(2))

\(\rightarrow\) Fit spectral lines with Gaussian line profile

[10]:
specfit.fit_spectrum(initial_guesses,
                     line_profile = 'gaussian', # Default
                     wavenumber_range = wavenumber_range,
                     __plot_bokeh__ = True,
                     __print__ = True, # Display fitted params and error bars
                    )
Loading BokehJS ...
Peak Number Center Center Error Intensity Intensity Error Width Width Error
0 1 2911.6974 0.0012 0.214 0.0566 0.004 0.0012
1 2 2911.6764 0.0025 0.104 0.0543 0.004 0.0025
2 3 2911.6230 0.0004 0.576 0.0468 0.005 0.0004
3 4 2911.5187 0.0016 0.169 0.0544 0.004 0.0016
4 5 2911.4009 0.0005 0.475 0.0491 0.004 0.0005
5 6 2911.3481 0.0004 0.579 0.0442 0.005 0.0005
6 7 2911.2862 0.0006 0.432 0.0508 0.005 0.0006
7 8 2911.2618 0.0004 0.609 0.0403 0.006 0.0004
8 9 2911.1865 0.0005 0.537 0.0492 0.004 0.0005

\(\rightarrow\) EDA on fitted parameters, compare initial guesses to the fitted:

[11]:
# Convert lists to arrays
guess_arr = np.array(initial_guesses)
fit_arr = specfit.fitted_params

# Create DataFrame with fitted vs. guessed params
df = pd.DataFrame({
    'center_guess': guess_arr[:,0],
    'center_fit': fit_arr[:,0],
    'intensity_guess': guess_arr[:,1],
    'intensity_fit': fit_arr[:,1],
    'width_guess': guess_arr[:,2],
    'width_fit': fit_arr[:,2]})
display(df)
center_guess center_fit intensity_guess intensity_fit width_guess width_fit
0 2911.697485 2911.697411 0.210332 0.213843 0.001 0.004055
1 2911.676406 2911.676367 0.106295 0.103791 0.001 0.004023
2 2911.623123 2911.623013 0.571206 0.575718 0.001 0.004631
3 2911.518359 2911.518673 0.168570 0.168550 0.001 0.004378
4 2911.400698 2911.400869 0.471979 0.475202 0.001 0.004427
5 2911.348160 2911.348104 0.572617 0.579227 0.001 0.005059
6 2911.286321 2911.286236 0.439682 0.432089 0.001 0.004633
7 2911.261846 2911.261797 0.608428 0.609348 0.001 0.005632
8 2911.186087 2911.186549 0.524801 0.536988 0.001 0.004440

5.3 - Save the results: Plots, dfs

\(\rightarrow\) Plot with Seaborn and save as a PDF

[12]:
specfit.fit_spectrum(initial_guesses,
                     line_profile = 'gaussian', # Default
                     wavenumber_range = wavenumber_range,
                     __plot_seaborn__ = True,
                     __save_plots__ = True, # Display guesses and fitted params
                     __show_plots__ = False
                    )
<Figure size 7000x4200 with 0 Axes>

\(\rightarrow\) Save fitted parameters in a CSV file

[13]:
df = pd.DataFrame(specfit.fitted_params, columns = ['center','height','width'])
[14]:
display(df)
center height width
0 2911.697411 0.213843 0.004055
1 2911.676367 0.103791 0.004023
2 2911.623013 0.575718 0.004631
3 2911.518673 0.168550 0.004378
4 2911.400869 0.475202 0.004427
5 2911.348104 0.579227 0.005059
6 2911.286236 0.432089 0.004633
7 2911.261797 0.609348 0.005632
8 2911.186549 0.536988 0.004440
[15]:
# Define file name
file_name = "fitted_gaussian_lines.csv"

# Save DataFrame to CSV
df.to_csv(os.path.join(__reference_data_path__,'processed_data',file_name), index=False)